Oozie Hive Action Extension
配置:job-tracker,name-node,script
和其它必须参数和配置。
指定Hive配置文件: job-xml
和 configuration
,其中 configuration
配置可以覆盖 job-xml
文件中定义的值。 Hadoop mapred.job.tracker
和fs.default.name
属性一定不能在行内configuration
中出现。添加files 和archives ,参见 [WorkflowFunctionalSpec#FilesAchives]
和[Adding Files and Archives for the Job]章节
语法:
<workflow-app name="[WF-DEF-NAME]" xmlns="uri:oozie:workflow:0.1">
...
<action name="[NODE-NAME]">
<hive xmlns="uri:oozie:hive-action:0.2">
<job-tracker>[JOB-TRACKER]</job-tracker>
<name-node>[NAME-NODE]</name-node>
<prepare>
<delete path="[PATH]"/>
...
<mkdir path="[PATH]"/>
...
</prepare>
<job-xml>[HIVE SETTINGS FILE]</job-xml>
<configuration>
<property>
<name>[PROPERTY-NAME]</name>
<value>[PROPERTY-VALUE]</value>
</property>
...
</configuration>
<script>[HIVE-SCRIPT]</script>
<param>[PARAM-VALUE]</param>
...
<param>[PARAM-VALUE]</param>
<file>[FILE-PATH]</file>
...
<archive>[FILE-PATH]</archive>
...
</hive>
<ok to="[NODE-NAME]"/>
<error to="[NODE-NAME]"/>
</action>
...
</workflow-app>
prepare
元素 ,指定路径将删除或创建在启动作业前,路径必须开头: hdfs://HOST:PORT
job-xml
为Hive作业配置文件,在schema 0.3中,可以添加多个job-xml
,指定多个配置
configuration
包含的配置信息将被提交到Hive作业中。
script
包含要执行的Hive脚本路径, 脚本可以用 ${VARIABLE}
变量,可以通过 params
元素来指定这些变量的值
params
包含提交到Hive脚本中的参数
所有参数都可以模板化:
<workflow-app name="sample-wf" xmlns="uri:oozie:workflow:0.1">
...
<action name="myfirsthivejob">
<hive xmlns="uri:oozie:hive-action:0.2">
<job-tracker>foo:8021</job-tracker>
<name-node>bar:8020</name-node>
<prepare>
<delete path="${jobOutput}"/>
</prepare>
<configuration>
<property>
<name>mapred.compress.map.output</name>
<value>true</value>
</property>
</configuration>
<script>myscript.q</script>
<param>InputDir=/home/tucu/input-data</param>
<param>OutputDir=${jobOutput}</param>
</hive>
<ok to="myotherjob"/>
<error to="errorcleanup"/>
</action>
...
</workflow-app>
默认Hive和Site配置文件:Hive (as of Hive 0.8)忽略hive-default.xml
文件,Oozie (as of Oozie 3.4)忽略oozie.hive.defaults
属性
Hive Action日志:日志信息直接显示在STDOUT/STDERR
设置日志等级使用属性:oozie.hive.log.level
,默认INFO
Hvie Xml-Schema
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:hive="uri:oozie:hive-action:0.5" elementFormDefault="qualified"
targetNamespace="uri:oozie:hive-action:0.5">
.
<xs:element name="hive" type="hive:ACTION"/>
.
<xs:complexType name="ACTION">
<xs:sequence>
<xs:element name="job-tracker" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="name-node" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="prepare" type="hive:PREPARE" minOccurs="0" maxOccurs="1"/>
<xs:element name="job-xml" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="configuration" type="hive:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
<xs:element name="script" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="param" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="argument" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="file" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="archive" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="CONFIGURATION">
<xs:sequence>
<xs:element name="property" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="PREPARE">
<xs:sequence>
<xs:element name="delete" type="hive:DELETE" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="mkdir" type="hive:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="DELETE">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
<xs:complexType name="MKDIR">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
</xs:schema>
Hive Action Schema Version 0.4
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:hive="uri:oozie:hive-action:0.4" elementFormDefault="qualified"
targetNamespace="uri:oozie:hive-action:0.4">
.
<xs:element name="hive" type="hive:ACTION"/>
.
<xs:complexType name="ACTION">
<xs:sequence>
<xs:element name="job-tracker" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="name-node" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="prepare" type="hive:PREPARE" minOccurs="0" maxOccurs="1"/>
<xs:element name="job-xml" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="configuration" type="hive:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
<xs:element name="script" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="param" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="file" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="archive" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="CONFIGURATION">
<xs:sequence>
<xs:element name="property" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="PREPARE">
<xs:sequence>
<xs:element name="delete" type="hive:DELETE" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="mkdir" type="hive:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="DELETE">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
<xs:complexType name="MKDIR">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
</xs:schema>
Hive Action Schema Version 0.3
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:hive="uri:oozie:hive-action:0.3" elementFormDefault="qualified"
targetNamespace="uri:oozie:hive-action:0.3">
.
<xs:element name="hive" type="hive:ACTION"/>
.
<xs:complexType name="ACTION">
<xs:sequence>
<xs:element name="job-tracker" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="name-node" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="prepare" type="hive:PREPARE" minOccurs="0" maxOccurs="1"/>
<xs:element name="job-xml" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="configuration" type="hive:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
<xs:element name="script" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="param" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="file" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="archive" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="CONFIGURATION">
<xs:sequence>
<xs:element name="property" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="PREPARE">
<xs:sequence>
<xs:element name="delete" type="hive:DELETE" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="mkdir" type="hive:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="DELETE">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
<xs:complexType name="MKDIR">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
</xs:schema>
Hive Action Schema Version 0.2
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:hive="uri:oozie:hive-action:0.2" elementFormDefault="qualified"
targetNamespace="uri:oozie:hive-action:0.2">
.
<xs:element name="hive" type="hive:ACTION"/>
.
<xs:complexType name="ACTION">
<xs:sequence>
<xs:element name="job-tracker" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="name-node" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="prepare" type="hive:PREPARE" minOccurs="0" maxOccurs="1"/>
<xs:element name="job-xml" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="configuration" type="hive:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
<xs:element name="script" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="param" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="file" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="archive" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="CONFIGURATION">
<xs:sequence>
<xs:element name="property" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="PREPARE">
<xs:sequence>
<xs:element name="delete" type="hive:DELETE" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="mkdir" type="hive:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="DELETE">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
<xs:complexType name="MKDIR">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
</xs:schema>